************************************************************
************************************************************
*                                                          *
*      Part 2 : Generate Variables                         *
*                                                          *
************************************************************
************************************************************

cd ".\replication_folder"

****** 1. Generate alternative control group indicators (treat_*) **** 

*** 1.1 Control = non-coal producing counties from coal-producing states

use merged, clear

unique fips if !missing(average_employees) & year<=2011           // 254 counties produced coal between 2001 and 2011; The state of Washington stopped production in 2006; Therefore, in 2011, there are 25 coal states. Including all years prior to 2011, there are 26 coal states; 13 countiies stared to produce coal after 2011.

g treat = 1 if !missing(average_employees) & year==2011           // 207 counties produced coal in 2011, 33 counties permanently stopped coal production prior to 2011, and 14 counties were not producing coal in 2011 but resumed production after 2011. 
egen coal_state = min(treat), by(state) 
egen coal_first = min(year) if !missing(average_employees) , by(fips)
collapse (firstnm) treat coal_state coal_first, by(fips)
replace treat = 0 if treat==. & coal_state==1  & coal_first>2011 // Either non-coal producing counties or counties that started to produce coal after 2011
unique fips , by(treat)                                          // treat=207; control=1668
keep fips treat
keep if !mi(treat)

save fips_base, replace

*** 1.2  Control = non-coal producing counties within 100 or 200 miles of treated counties

*** 1.2.1 Generate the treatment group (coal producing counties as of 2011)

use merged, clear

g treat = 1 if !missing(average_employees) & year==2011   
keep if treat==1
keep fips treat
duplicates drop

save fips_treat, replace               

*** 1.2.2 Generate distance-based control groups

use fips_treat, clear

merge 1:m fips using distance         // Merge distances
keep if _merge==3

g treat_100 = 0 if mi_to_county<=100 
g treat_200 = 0 if mi_to_county<=200 
g treat_donat = 0 if mi_to_county<=200 & mi_to_county>25
collapse (firstnm) treat_*, by(county2)   // One county can be close to multiple treatment counties
missings dropobs treat_*, force           // Drop non-coal counties that are not close to any treatment counties
destring  county2 , gen(fips)
keep fips treat_*

save fips_control, replace

*** 1.3 Shale counties based on Gilje (2019)

use shale_data, clear

sort fips_county year_month
g year = year(year_month)
keep if year==2011

destring  fips_county , gen(fips)
g treat_shale = 0

collapse (firstnm) treat_shale, by(fips)

save fips_shale, replace

*** 1.4 Oil and gas counties based on EIA

import excel ".\replication_folder\fcml.xls", sheet("2011") firstrow case(lower) clear

g cname = strtrim(countyname)
rename (state   ) (sab )
drop if sab == "AK" | inlist(countycode,"990","995","999")  // Drop offshore fields and fields in AK
unique fieldcode                                            // Number of unique values of fieldcode is  47353

merge m:1 sab cname using countyfipstool20190120    
drop if _merge==2

unique fips    // Number of unique values of fips is  1328
unique sname   // Number of unique values of sname is  36

keep fips
duplicates drop
replace fips = 12086 if fips ==12025 

g treat_oilgas = 0     // 1328 oil and gas counties

save fips_oilgas, replace


****** 2 Generate variables ************************************************

import delimited ".\replication_folder\Aggregate_coal_mine_average_employees.csv", clear  // Add aggregate coal production from EIA
tsset year
g l_coal = coalaverage_employeesustot
keep year l_coal 
merge 1:m year using merged
drop _merge

merge m:1 fips using fips_base
drop _merge

merge m:1 fips using fips_control
drop _merge

merge m:1 fips using fips_shale
drop _merge

merge m:1 fips using fips_oilgas
drop _merge

g temp=1 if !missing(average_employees) & year<=2011   // 254 counties produced coal between 2001 and 2011; The state of Washington stopped production in 2006; Therefore, in 2011, there are 25 coal states. Including all years prior to 2011, there are 26 coal states; 13 countiies stared to produce coal after 2011.
egen coal_county = min(temp), by(fips)

foreach v in _oilgas _shale _100 _200 _donat {
replace treat`v' = 1 if treat == 1
replace treat`v' = . if treat`v'==0  & coal_county==1 // Either non-coal producing counties or counties that started to produce coal after 2011
}
g treat_oil_gas = 1 if treat==1                       // Generate baseline treatment indicator with oil (including shale) & gas counties as the control group
replace treat_oil_gas = 0 if (treat_shale==0 | treat_oilgas==0 ) & coal_county!=1       

xtset fips year

g post = (year>2011)

g black_per = black/pop_tot
foreach v in pop_tot  black_per median_age median_inc	median_home_value	inequality	single_parent	education {
	egen _`v' = mean(`v'), by(fips)
}

g pop_old = tot_pop3
g pop_young = tot_pop1 + tot_pop2
g pop_white = wa_male0+wa_female0
g pop_black = ba_male0+ba_female0
g application_amt_white =  application_amt_white_1 +  application_amt_white_3
g application_amt_black =  application_amt_black_1 +  application_amt_black_3
g emp_s_l = emp_st + emp_loc    // state and local employment
g avg_pay_s_l = (avg_pay_st*emp_st+avg_pay_loc*emp_loc)/(emp_st + emp_loc )  // state and local pay

foreach v in emp_all emp_non_coal emp_fed emp_s_l  avg_pay_all  avg_pay_non_coal avg_pay_s_l  tot_cntydep tot_cntydep_large pcpi popestimate pop_young pop_old pop_white pop_black   {
 g `v'_2007 = `v' if year==2007
 bys fips (year): carryforward `v'_2007, replace
 g `v'_r = 100*`v'/`v'_2007
 drop `v'_2007
}

foreach v in application_amt_1  application_amt_white_1 application_amt_black_1    {
 g `v'_2008 = `v' if year==2008
 bys fips (year): carryforward `v'_2008, replace
 g `v'_r = 100*`v'/`v'_2008
 drop `v'_2008
}


foreach v in un_rate  {
	g `v'_r = `v'
}

foreach v in tot_cntydep tot_cntydep_large  application_amt_1 application_amt_white_1 application_amt_black_1 {
	replace `v'=`v'/1000000
}

foreach v in  bra {
	
	g x = tot_cnty`v'
	replace x = 0 if x==.
	egen i1 = xtile(x) if year==2011 ,  n(2)
	g i2 = (i1==1) if !missing(i1)
	egen low_atf_`v' = mean(i2) , by(fips)
	drop x i1 i2
	
	g x = large_`v'
	replace x = 0 if large_`v'==. 
	egen i1 = xtile(x) if year==2011 ,  n(2)
	g i2 = (i1==1) if !missing(i1)
	egen low_large_`v' = mean(i2) , by(fips)
	drop x i1 i2
	
}

egen rural = mean(cfpb),by(fips)
replace rural = 0 if rural==.

foreach v in abs rel {
egen i1 = xtile(mobility_`v') if year==2011,  n(2)
g i2 = (i1==1) if !missing(i1)
egen low_mob_`v' = mean(i2) , by(fips)
drop i1 i2
}

g temp1 = 100*average_employees/l_coal if year==2011 
egen share = mean(temp1),by(fips)
replace share=0 if treat==0

drop  temp1 temp

save merged_base, replace


****** 3 Generate PSM sample ***********************

use merged_base, clear

replace treat=0 if treat==. & coal_county!=1
keep if tin(2007,2011)

g black_sh = 100*(ba_male0+ba_female0)/tot_pop0
keep  fips  year treat popestimate pcpi un_rate black_sh
reshape wide popestimate un_rate  pcpi black_sh, i(fips) j(year)

set seed 12345
gen sort=uniform()									// Random sort (necessary for psmatch2 to work well).
sort sort

psmatch2 treat  popestimate* un_rate* pcpi* black_sh* , neighbor(1)  noreplacement	
pstest , both 
psgraph

preserve
	keep _n1
	rename _n1 _id
	drop if _id==.
	duplicates drop
	tempfile temp
	save `temp', replace
restore

merge 1:1 _id using `temp'		 
g psm = 1 if (treat==1 & _support==1) |(treat==0 & _merge==3)   // Generate indicator for the psm sample

keep fips psm  

merge 1:m fips using merged_base	
drop _merge
xtset fips year

g treat_psm = 1 if treat == 1
replace treat_psm = 0 if mi(treat_psm)  & psm==1
unique fips, by(treat_psm)            // 1:207; 0:206

keep  state fips year post lat lng n_close n_active production average_employees labor_hours treat_oil_gas treat treat_100 treat_200 treat_psm    ///
emp_all avg_pay_all popestimate pop_old pop_young pop_white pop_black application_amt_1 application_amt_white_1 application_amt_black_1 ///
emp_all_r avg_pay_all_r popestimate_r pop_old_r pop_young_r pop_white_r pop_black_r application_amt_1_r application_amt_white_1_r application_amt_black_1_r ///
emp_non_coal_r avg_pay_non_coal_r emp_fed_r avg_pay_fed_r emp_s_l_r avg_pay_s_l_r ///
emp_non_coal   avg_pay_non_coal   emp_fed   avg_pay_fed   emp_s_l     avg_pay_s_l low_atf_bra low_large_bra rural low_mob_rel ///
_pop_tot _black_per _median_age _median_inc _median_home_value _inequality _single_parent _education share pop_tot black_per median_age median_inc median_home_value inequality single_parent education

order fips year lat lng state post n_close n_active production average_employees labor_hours treat_oil_gas treat treat_100 treat_200 treat_psm share       ///
emp_all_r avg_pay_all_r popestimate_r pop_old_r pop_young_r pop_white_r pop_black_r application_amt_1_r application_amt_white_1_r application_amt_black_1_r  ///
emp_non_coal_r avg_pay_non_coal_r emp_fed_r avg_pay_fed_r emp_s_l_r avg_pay_s_l_r  , first

compress

* Outcome variables
label var emp_all_r "Employment"
label var avg_pay_all_r "Wages"
label var popestimate_r "Populaiton All"
label var pop_old_r ">=65"
label var pop_young_r "<65"
label var pop_white_r "Population White"
label var pop_black_r "Populaiton Black"
label var application_amt_1_r "Applicaiton All"
label var application_amt_white_1_r "Applicaiton White"
label var application_amt_black_1_r  "applicaiton Black"
label var emp_non_coal_r "Non-Mining Employment"
label var avg_pay_non_coal_r "Non-Mining Wages"
label var emp_fed_r "Federal Employment"
label var avg_pay_fed_r "Federal Wages"
label var emp_s_l_r "State and Local Employment"
label var avg_pay_s_l_r "State and Local Wages"

* Treatment measures
label var treat_oil_gas "Baseline"
label var treat "Coal States"
label var treat_100 "Within 100 Miles"
label var treat_200 "Within 200 Miles"
label var treat_psm "PSM"
label var share "Treatment Intensity"

* Access to finance and mobility
label var low_atf_bra "1 (Low_A2F)"
label var low_large_bra "1 (Low_A2F_LB)"
label var rural "1 (Rural)"
label var low_mob_rel "1 (Low_Mobility)"

save merged_base, replace

